1 Names & Workflow

Joe Margolis Quinn Hodgman Max Menache Devinn Chi Graham Elliot

2 Topic & Research Questions

Topic: Baseball Research question 1: How have batting average trends compared over time to OPS+ (might change OPS+)? Playoff vs. regular season Team averages stats, comparing playoff and non-playoff teams Single player averages, comparing all stars and non-all stars

Research question 2: Finding trends in each of the typed of hits over time, comparing singles, doubles, triples, homers.

Research question 3: How do players usually perform after a mid-season trade?

Research question 4: Which type of hit (single, double, triple, HR) best correlates with the value of the player?

library(Lahman)
data(Batting)
data(Teams)
data(BattingPost)
library(tufte)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5     ✓ purrr   0.3.4
## ✓ tibble  3.1.4     ✓ dplyr   1.0.7
## ✓ tidyr   1.1.3     ✓ stringr 1.4.0
## ✓ readr   2.0.1     ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(ggplot2)
library(ggmap)
## Google's Terms of Service: https://cloud.google.com/maps-platform/terms/.
## Please cite ggmap if you use it! See citation("ggmap") for details.
library(tint)
## 
## Attaching package: 'tint'
## The following objects are masked from 'package:tufte':
## 
##     margin_note, newthought, quote_footer, sans_serif
library(rvest)
## 
## Attaching package: 'rvest'
## The following object is masked from 'package:readr':
## 
##     guess_encoding
library(patchwork)
options(htmltools.dir.version = FALSE)

How Do Players Perform following a mid-season trade

hittingStats <- Batting %>%
  mutate(BA = H/AB) %>%
  filter(yearID >=1955) 
tradedOnly <- hittingStats %>%
  group_by(yearID, playerID) %>%
  mutate(num_entry = n()) %>% # add variable that equals the number of entries
  ungroup() %>%
  filter(num_entry > 1) %>%
  na.omit()# keep rows that have 1 entry per year/name or if they have more than 1, make sure the team = 'TOT'

Batting Average

ggplot(tradedOnly, aes(y = BA, x = yearID, color = as.factor(stint))) +
  geom_point()+
  geom_smooth()
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

tradedOnlyYearBA <- tradedOnly %>%
  group_by(yearID, stint) %>%
  summarise(yAB = sum(AB), yH = sum(H)) %>%
  mutate(yBA = yH/yAB)
## `summarise()` has grouped output by 'yearID'. You can override using the `.groups` argument.
ggplot(tradedOnlyYearBA, aes(y = yBA, x = yearID, color = as.factor(stint))) +
  geom_point()+
  geom_smooth()+
  labs(title = "Batting Averages before and after players were traded in each year", y="Combined Batting Averages", x = "Year", caption = "Figure 1: Combined batting averages for people before they were traded and after thay were traded each year.", color = "Stint")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

This shows that there is an increase in batting averages after a player is traded, but the increase is slowly closing

Batting Average Postseason

Postseason <- BattingPost %>%
  filter(yearID > 1955) %>%
  group_by(playerID, yearID, teamID) %>%
  summarise(PG = sum(G), PAB = sum(AB), PR = sum(R), PH = sum(H), `P2B` = sum(`X2B`), `P3B` = sum(`X3B`), PHR = sum(HR), PRBI = sum(RBI), PSB = sum(SB), PCS = sum(CS), PBB = sum(BB), PSO = sum(SO), PIBB = sum(IBB), PHBP = sum(HBP), PSH = sum(SH), PSF = sum(SF), PGIDP = sum(GIDP)) %>%
  na.omit()
## `summarise()` has grouped output by 'playerID', 'yearID'. You can override using the `.groups` argument.
ShowTrade <- hittingStats %>%
  group_by(yearID, playerID) %>%
  mutate(num_entry = n()) %>% # add variable that equals the number of entries
  ungroup() %>%
  na.omit()

PostShowTradeYear <- Postseason %>%
  left_join(ShowTrade, by = c("playerID"="playerID", "yearID" = "yearID", "teamID" = "teamID"))%>%
  group_by(yearID, num_entry) %>%
  summarise(YPG = sum(PG), YPAB = sum(PAB), YPR = sum(PR), YPH = sum(PH), `YP2B` = sum(`P2B`), `YP3B` = sum(`P3B`), YPHR = sum(PHR), YPRBI = sum(PRBI), YPSB = sum(PSB), YPCS = sum(PCS), YPBB = sum(PBB), YPSO = sum(PSO), YPIBB = sum(PIBB), YPHBP = sum(PHBP), YPSH = sum(PSH), YPSF = sum(PSF), YPGIDP = sum(PGIDP)) %>%
  mutate(YPBA = YPH/YPAB) %>%
  na.omit()
## `summarise()` has grouped output by 'yearID'. You can override using the `.groups` argument.
ggplot(PostShowTradeYear, aes(y = YPBA, x = yearID, color = as.factor(num_entry))) +
  geom_point()+
  geom_smooth()+
  labs(title = "Playoff Batting Averages of players traded vs. Not traded", y="Combined Playoff Batting Averages", x = "Year", caption = "Figure 1: Combined batting averages for people before they were traded and after thay were traded each year.", color = "Stint")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

tradedOnlyYearOPS <- tradedOnly %>%
  mutate(PA = AB + BB + HBP + SF + SH) %>%
  mutate(TOB = H + BB + HBP) %>%
  mutate(Singles = H - `X2B` - `X3B` - `HR`) %>%
  mutate(TB = Singles + (2*`X2B`) + (3*`X3B`)+ (4*HR)) %>%
  group_by(yearID, stint) %>%
  summarise(yPA = sum(PA), yTOB = sum(TOB), yTB = sum(TB), yAB = sum(AB)) %>%
  mutate(yOBP = yTOB/yPA) %>%
  mutate(ySLG = yTB/yAB) %>%
  mutate(yOPS = yOBP + ySLG)
## `summarise()` has grouped output by 'yearID'. You can override using the `.groups` argument.
PostShowTradeYearOPS <- PostShowTradeYear %>%
  mutate(YPPA = YPAB + YPBB + YPHBP + YPSF + YPSH) %>%
  mutate(YPTOB = YPH + YPBB + YPHBP) %>%
  mutate(YPSingles = YPH - `YP2B` - `YP3B` - `YPHR`) %>%
  mutate(YPTB = YPSingles + (2*`YP2B`) + (3*`YP3B`)+ (4*YPHR))  %>%
  mutate(YPOBP = YPTOB/YPPA) %>%
  mutate(YPSLG = YPTB/YPAB) %>%
  mutate(YPOPS = YPOBP + YPSLG)
ggplot(tradedOnlyYearOPS, aes(y = yOBP, x = yearID, color = as.factor(stint))) +
  geom_point()+
  geom_smooth()+
  labs(title = "On Base Percentages before and after players were traded in each year", y="Combined On Base Percentages", x = "Year", caption = "Figure 1: Combined On Baseb Percentages for people before they were traded and after thay were traded each year.", color = "Stint")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

ggplot(tradedOnlyYearOPS, aes(y = ySLG, x = yearID, color = as.factor(stint))) +
  geom_point()+
  geom_smooth()+
  labs(title = "Slugging Percentages before and after players were traded in each year", y="Combined Slugging Percentages", x = "Year", caption = "Figure 1: Combined Slugging Percentages for people before they were traded and after thay were traded each year.", color = "Stint")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

ggplot(tradedOnlyYearOPS, aes(y = yOPS, x = yearID, color = as.factor(stint))) +
  geom_point()+
  geom_smooth()+
  labs(title = "OPS before and after players were traded in each year", y="Combined OPS", x = "Year", caption = "Figure 1: Combined OPS for people before they were traded and after thay were traded each year.", color = "Stint")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

The following graphs show the same trends in these other stats as batting average did.

ggplot(PostShowTradeYearOPS, aes(y = YPOBP, x = yearID, color = as.factor(num_entry))) +
  geom_point()+
  geom_smooth()+
  labs(title = "Playoff On Base Percentage of players traded vs. Not traded", y="Combined Playoff On Base Percentage", x = "Year", caption = "Figure 1: Combined on base percentage for people before they were traded and after thay were traded each year.", color = "Stint")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

ggplot(PostShowTradeYearOPS, aes(y = YPSLG, x = yearID, color = as.factor(num_entry))) +
  geom_point()+
  geom_smooth()+
  labs(title = "Playoff Slugging Percentage of players traded vs. Not traded", y="Combined Playoff Slugging Percentage", x = "Year", caption = "Figure 1: Combined slugging percentage for people before they were traded and after thay were traded each year.", color = "Stint")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

ggplot(PostShowTradeYearOPS, aes(y = YPOPS, x = yearID, color = as.factor(num_entry))) +
  geom_point()+
  geom_smooth()+
  labs(title = "Playoff On Base Plus Slugging Percentage of players traded vs. Not traded", y="Combined Playoff On Base Plus Slugging Percentage", x = "Year", caption = "Figure 1: Combined on base plus slugging percentage for people before they were traded and after thay were traded each year.", color = "Stint")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'